/*******************************
name:		kayla freeman
date:		8/1/24
purpose:	prep 2 files tracking default exposure of lender as of 1)obs year; 2)package startdate
*******************************/

clear

/*to start, get gvs with fyear and datadate for measuring defaulters etc.*/
use "01_comp_basic_raw_vars"
drop if missing(gvkey)
drop if missing(fyear)
destring gvkey,replace
sort gvkey fyear at
by gvkey fyear: keep if _n==1
keep gvkey fyear datadate
gen date = mdy(month(datadate),1,year(datadate))
format date %td
drop datadate
ren fyear year
tempfile compers
save `compers'

clear
use 03_ratings/*dataset of Compustat ratings*/
destring gvkey,replace
tab splticrm
tab spsdrm
tab spsticrm

/*call it a d if any of the 3 are d*/

gen default = 0
replace default = 1 if splticrm=="D" | spsticrm=="D" | spsticrm=="D"
tab def 

gen year = year(datadate)
tab year /*ratings data spotty after 2015*/
drop if year>2015

/*proc - exec dec: do this month by month, then i can agg it up by s fyears if i want later*/
gen month = month(datadate)
sort gvkey year
gen date = mdy(month,1,year)
format date  %td
keep gvkey default year month date
gen modt = mofd(date)
format modt %tm
sort gvkey modt
tempfile justasec
save `justasec'

keep if default==1 
tempfile baduns
save `baduns'

clear
use `justasec'
sort gvkey date
forval i = 0/23{
	by gvkey: gen def_`i' = default[_n-`i'] if modt[_n-`i'] == modt-`i'
}
	egen def12mo = rowtotal(def_0 def_1 def_2 def_3 def_4 def_5 def_6 def_7 def_8 def_9 def_10 def_11)
	egen def24mo = rowtotal(def12mo def_12 def_13 def_14 def_15 def_16 def_17 def_18 def_19 def_20 def_21 def_22 def_23)
replace def12mo = 1 if def12mo>0
replace def24mo = 1 if def24mo>0
drop def_*
keep gvkey date def12 def24
merge 1:1 gvkey date using `compers'
drop if _merge==1 
tab year if _merge==2
drop if year(date)<1970 |year(date)>2015
drop _merge

tempfile gvyrs
save `gvyrs'
/**/
clear
import excel using "DS_Links2018", firstrow case(lower) sheet(link_data)
drop if year(facstartdate)>2015 /* default data gets spotty after 2015*/

ren facid facilityid

ren discrep_bcoid_flag linkflag
replace linkflag =1 if discrep_comp==0 & discrep_tick==0
drop company coname_h fic score* smbl* ticker discrep*
sort facilityid
tempfile inprog
save `inprog'

clear
use 00_facility
gen year = substr(facilitystartdate,1,4)
destring year,replace
drop if year >2016
drop year
tempfile fac
save `fac'
clear 
use 00_package
keep packageid borrowercompanyid
sort packageid
bysort packageid: keep if _n==1
merge 1:m packageid using `fac'
keep if _merge==3
drop _merge
ren borrowercompanyid bcoid
merge 1:1 facilityid bcoid using `inprog'  

keep if _merge==3
drop _merge

/*fix facilitystartdate*/
gen _year = substr(facilitystartdate,1,4)
gen _mo = substr(facilitystartdate,6,2)
gen _day = 1
destring _year _mo _day,replace
gen altst = mdy(_mo,_day,_year)
format altst %td
drop facilitystartdate _year _mo _day
ren altst facilitystartdate
/*fix facilityenddate*/
gen _year = substr(facilityenddate,1,4)
gen _mo = substr(facilityenddate,6,2)
gen _day = 1 
destring _year _mo _day,replace
gen altst = mdy(_mo,_day,_year)
format altst %td
drop facilityenddate _year _mo _day
ren altst facilityenddate

count if facstartdate!=facilitystartdate 
replace linkflag = 1 if facstartdate!=facilitystartdate
order facstartdate facilitystart /*keep earlier*/
replace facstartdate = facilitystartdate if facilitystartdate<facstartdate

/*drop unnecessary vars*/
drop bcoid   facilitystartdate   loantype    secured     
gen facstart = mdy(month(facstartdate),1,year(facstartdate)) /*first day of mo to match the fyr-end data*/
format facstart %td
drop facstartdate
gen facend = mdy(month(facilityenddate),1,year(facilityenddate)) /*first day of mo to match the fyr-end data*/
format facend %td
drop facilityenddate

ren facstart date
sort facilityid date 
by facilityid: replace date = date[1]
replace facend = date if missing(facend) /*gonna keep the latest facend, so this helps drop out the missings*/
sort facilityid facend
by facilityid: replace facend = facend[_N]
by facilityid: keep if _n==1
ren facend enddt
keep packageid facilityid  date enddt gvkey 

ren facilityid facid
expand 2
sort facid date

gen modt = mofd(date)
by facid: replace modt = mofd(enddt) if _n==2
format modt %tm
bysort facid modt: keep if _n==1
tsset facid modt
tsfill
sort facid modt
by facid: replace gvkey = gvkey[1]
drop date enddt
/* at this point: have a running series of months in which a facid is outstanding, with gvkey. next up, merge into lenders*/

ren facid facilityid
joinby facilityid using 10_leads.dta,unmatched(master)
gen nolead = _merge==1
drop _merge

merge m:1 gvkey modt using `baduns'
drop if _merge==2
replace default = 0 if _merge==1
drop year month  packageid facilityid _merge

drop date

gen date = dofm(modt)
format date %d

sort companyid modt gvkey
by companyid modt gvkey: keep if _n==1 /*count defaulters by gvkey, not by facid*/
tempfile gvkruns
save `gvkruns'

gen year = year(date)
gen month = month(date)
forval i = 1/12{
	preserve
	replace year = year+1 if month>`i'
	sort companyid year gvkey default
	collapse (max) default,by(companyid year gvkey) /*ensures a defaulter is counted only once per year*/
	bys companyid year: gen numgvs = _N
	collapse (sum) default (first) numgvs,by(companyid year) /* counts defaulters within last 12 mos*/
	sort companyid year
	by companyid: 	gen lagdefault = default[_n-1] /*gives lagged defaults*/
	by companyid: gen lagnumgvs = numgvs[_n-1]
	gen date = mdy(`i',1,year)
	tempfile _`i'
	save `_`i''
	restore
}
clear
forval i = 1/12{
	append using `_`i''
}
sort companyid year date
by companyid year date: gen dup = cond(_N==1,0,_n)
tab dup /*none*/
drop dup
format date %td
tab default
by companyid: egen ever = total(default)
	replace ever = 1 if ever>0

/*here a split - need a gvkey-year panel for appendix table; need package-level for k analysis*/
tempfile split
save `split'
gen percdef = default/numgvs
gen lagpercdef = lagdefault/lagnumgvs
sum percdef lagpercdef,det

merge 1:m companyid date using `gvkruns' 
keep if _merge==3
drop _merge

gen percdef2 =percdef

collapse  (max) percdef default nolead (mean) percdef2,by(gvkey date)

merge 1:1 gvkey date using `gvyrs'

drop if _merge==1
drop if _merge==2
drop _merge

sort gvkey year
by gvkey year: gen dup = cond(_N==1,0,_n)
tab dup/*none*/
drop dup
save 11_portdefaults,replace


clear
use `split'
drop if missing(companyid)
keep companyid date default numgvs lagdefault lagnumgvs
tempfile rundate
save `rundate'

clear
use 00_facility 
gen year = substr(facilitystartdate,1,4)
destring year,replace
drop if year >2016
drop year
tempfile fac
save `fac'
clear 
use 00_package 
keep packageid borrowercompanyid
sort packageid
bysort packageid: keep if _n==1
merge 1:m packageid using `fac'
keep if _merge==3
drop _merge
ren borrowercompanyid bcoid

/*fix facilitystartdate*/
gen _year = substr(facilitystartdate,1,4)
gen _mo = substr(facilitystartdate,6,2)
gen _day = 1/*substr(facilitystartdate,9,2)*/
destring _year _mo _day,replace
gen altst = mdy(_mo,_day,_year)
format altst %td
drop facilitystartdate _year _mo _day
ren altst facilitystartdate
keep packageid facilityid facilitystartdate 


joinby facilityid using 10_leads.dta,unmatched(master)
gen nolead = _merge==1
drop _merge
drop if missing(companyid)

sort packageid companyid facilitystartdate
bys package companyid: keep if _n==1
keep packageid companyid facilitystartdat 
ren fac date
merge m:1 companyid date using `rundate'
keep if _m==3
drop _m /*at this point - have the num of defaults in the last two years at the outset of package. need to collapse at package level*/

gen percdef = default/numgvs
ren date packdate
collapse (max) percdef  (min) packdate,by(packageid)

tempfile percy
save `percy'

clear
use 07b_kdata_pkg

drop conc*
drop kcon_overexceptions
sort gvkey year
capture drop hasak
sort gvkey packageid year
bysort gvkey packageid year: keep if _n==1
bysort gvkey year: egen hasak = total(kcon)
order gvkey year packageid hasak
tab hasak 
merge m:1 packageid using `percy'
keep if _m==3
drop _m

sort gvkey year valid kcon_elig
bysort gvkey year valid kcon_elig: gen dup = cond(_N==1,0,_n)
tab dup /* multiple dups; need to prioritize contracts that have conc recs the most, then eligibility reqs, then hasak, then valid, then everyone else.*/
gen thedef = .
/*concs*/
gen conc = 0
	replace conc = 1 if kcon_credlimit==1 |kcon_hasconc==1
bys gvkey year: egen hasconc = total(conc)
	replace hasconc = 1 if hasconc>0
bys gvkey year hasconc: egen concdef = max(percdef)
	replace thedef = concdef if hasconc==1
	sort gvkey year thedef
	by gvkey year: replace thedef = thedef[1]
/*eligs*/
capture drop haselig
gen elig = 0
	replace elig = 1 if kcon_elig==1
bys gvkey year: egen haselig = total(elig)
	replace haselig = 1 if haselig>0
bys gvkey year haselig: egen eligdef = max(percdef)
	replace thedef = eligdef if haselig==1 & missing(thedef)
/*hasaks*/
bys gvkey year: egen hask = total(hasak)
	replace hask = 1 if hask>0
bys gvkey year hask: egen haskdef = max(percdef)
	replace thedef = haskdef if hask==1 & missing(thedef)
/*valids*/
bys gvkey year: egen hasval = total(valid)
	replace hasval = 1 if hasval>0
bys gvkey year hasval: egen hasvaldef = max(percdef)
	replace thedef = hasvaldef if hasval==1 & missing(thedef)
/*!missing valids*/
gen nomiss = !missing(valid)
bys gvkey year: egen there = total(nomiss)
	replace there = 1 if there>0
bys gvkey year there: egen theredef = max(percdef)
	replace thedef = theredef if there==1 & missing(thedef)
	replace thedef = theredef if there==0 & missing(thedef)

ren packdate date
merge m:1 gvkey date using `gvyrs'
	drop if _m==2
	drop _m
	gen lasttwo = def12+def24

collapse (first) thedef (max) lasttwo,by(gvkey year)
ren thedef pack_maxpercdef
ren gvkey s_gvkey
save 11b_portdefaults_pkgdate,replace